###################################################
################Replication Files##################
###########Criminal Violence in Mexico#############
##### Jane Esberg###################################

#run on macOS Sonoma 14.6.1
#Rstudio version 2023.09.0+463
#R version 4.3.1 Beagle Scouts

#######################################
#########LOADING PACKAGES, FUNCTIONS###
#######################################

#install.packages('pacman')
#library(pacman)
#remotes::install_github("shuo-zhang-ucsb/did_multiplegt") # Install (only need to run once or when updating)

pacman::p_load(lmtest, plm, ggplot2, tidyr, broom, stargazer,
               fixest, this.path, bacondecomp, did, stringr,
               stringi, plyr, dplyr, DIDmultiplegt, gridExtra,
               extrafont, sf, RColorBrewer)

sessionInfo()
#sets working directory to current folder
setwd(dirname(this.path()))

#create an empty file for outputs
dir.create('output', recursive = FALSE, mode = "0777", showWarnings = TRUE)

#function to get models in one line.
#inputs: dependent variables, independent variables, data, any covariates, 
#what to call covariates in the model (covariate_names)
#outputs: list of models
returnModels<-function(dvs, ivs, data, covariates=NULL, covariate_names=NULL){
  model_list=list() #empty list of models
  model_names=list() #empty list of model names
  
  for (i in 1:length(dvs)){
    for (j in 1:length(ivs)){
      dv<-dvs[i]
      iv<-ivs[j]
      formula<-as.formula(paste(dv, "~",iv, covariates, '|muni_id+year'))
      
      model <- feols(formula,cluster = ~ muni_id,
                     data = data)
      
      model_list<-append(model_list, list(model))
    }
  }
  
  return(model_list)
}

#code for event study plots - extracts data on years
#inputs: list of models, dependent variables list, comparison year,
#term that distinguishes the interaction terms of interest
#outputs: data formatted for event study plot

event_study_data<-function(models, dvs, base_year, searchvar){
  year_df<-data.frame(tidy(models[[1]], conf.int=T))
  year_df<-subset(year_df, str_detect(year_df$term, searchvar)==TRUE)
  year_df$year<-str_replace(year_df$term, searchvar, "")
  new_row<-data.frame(cbind(as.character(base_year), 0, 0, 0, 0, 0,0, base_year))
  colnames(new_row)<-colnames(year_df)
  year_df<-rbind(year_df, new_row)
  year_df$estimate<-as.numeric(as.character(year_df$estimate))
  year_df$conf.low<-as.numeric(as.character(year_df$conf.low))
  year_df$conf.high<-as.numeric(as.character(year_df$conf.high))
  year_df$dv<-dvs[1]
  
  
  if (length(models)==1){
    year_df$year<-as.numeric(as.character(year_df$year))
    year_df<-subset(year_df, is.na(year)==F)
    return(year_df)
  }
  else{
  for (i in 2:length(models)){
    year_df_new<-data.frame(broom::tidy(models[[i]], conf.int=T))
    year_df_new<-subset(year_df_new, str_detect(year_df_new$term, searchvar)==TRUE)
    year_df_new$year<-str_replace(year_df_new$term, searchvar, "")
    new_row<-data.frame(cbind(as.character(base_year), 0, 0, 0, 0, 0,0, base_year))
    colnames(new_row)<-colnames(year_df_new)
    year_df_new<-rbind(year_df_new, new_row)
    year_df_new$estimate<-as.numeric(as.character(year_df_new$estimate))
    year_df_new$conf.low<-as.numeric(as.character(year_df_new$conf.low))
    year_df_new$conf.high<-as.numeric(as.character(year_df_new$conf.high))
    year_df_new$dv<-dvs[i]
    
    year_df<-rbind(year_df, year_df_new)
    
  }
  year_df$year<-as.numeric(as.character(year_df$year))
  year_df<-subset(year_df, is.na(year)==F)
  
  return(year_df)
  }
}

#setting look and style for event study plots
es_base_plot<-ggplot()+  theme(plot.title = element_text(hjust = 0.5))+ 
  theme_bw()+
  theme(plot.title = element_text(hjust = 0.5, size=16),
        text = element_text(size=18),
        legend.position='none')+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  scale_linetype_discrete(name='', guide=guide_legend())+
  geom_hline(yintercept=0, linetype='dashed')

#set style and variable names for etable
setFixest_etable(style.tex = style.tex("aer"), 
                 notes={'\\scriptsize{$*** p < 0.001, ** p< 0.01, * p< 0.05, + p<.01$. Robust SEs clustered by municipality. Controls for mayoral party, poppy eradication, and marijuana eradication, plus state-year linear time trends.}'},
                 fitstat = ~ r2 + n + my,
                 digits=2,
                 digits.stats=2,
                 dict=c('muni_id'='municipality',
                        'dominant'='Major',
                        'small_groups'='Minor',
                        'kingpin_tr'='Kingpin Removal',
                        'gasxprice'='Gas Pipeline x Price',
                        'log(mariguana_kghec+1)'='Marijuana Hectares',
                        'log(amapola_kghec+1)'='Poppy Hectares',
                        'pan'='PAN party mayor',
                        'kingpin_tr_on'='Kingpin Removal (On)',
                        'kingpin_tr_ypre'='Kingpin Removal (presence=t-1)',
                        'kingpin_tr_yof'='Kingpin Removal (presence=t)',
                        'gasbinxprice'='Pipeline Presence x Price',
                        'gas_post17'='Gas Pipeline x Post-2017',
                        'distxprice'='Distance (log) x Price',
                        'gasxprice2020'='Gas Pipeline (2020) x Price',
                        'log(POBTOT+1)'='Log Population',
                        'illiteracy'='% Illiterate',
                        'piso_de_tierra'='% Earth Floor',
                        'indigenous_pop'='% Indigenous'))

#########################################
#######LOADING/MERGING DATA ############
########################################
mx_panel<-read.csv('mx_panel.csv') #general panel data
crim<-read.csv('criminal_groups.csv') #crime groups data
vars<-c('inegi', 'year', setdiff(colnames(crim), colnames(mx_panel)))

mx_panel<-merge(mx_panel, crim[vars], by=c('year', 'inegi'), all.x=T)

#grouping small orgs
mx_panel$small_groups<-mx_panel$unaffiliated+mx_panel$splinters+mx_panel$cells

#no emergence or expansion in first year of data
mx_panel <- mx_panel %>%
  mutate(across(c(splinters_emergence, cells_emergence, unaffiliated_emergence,
                  splinters_expansion, cells_expansion, unaffiliated_expansion),
                ~ ifelse(year == 2009, NA, .)))

mx_panel$emergence_small<-mx_panel$unaffiliated_emergence+mx_panel$splinters_emergence+mx_panel$cells_emergence
mx_panel$expansion_small<-mx_panel$unaffiliated_expansion+mx_panel$splinters_expansion+mx_panel$cells_expansion


#################################
###FIGURE 1: TERRITORY MAPS######
#################################
munis <- st_read("muni_2012gw/Muni_2012gw.shp") #shapefile
munis$inegi<-paste(munis$CVE_ENT, munis$CVE_MUN, sep='') #ensure inegi codes match
munis$inegi<-as.numeric(as.character(munis$inegi))

mx_panel$groups_factor<-NA #define levels for map
mx_panel$groups_factor[mx_panel$unique_groups==0]<-'0'
mx_panel$groups_factor[mx_panel$unique_groups==1]<-'1'
mx_panel$groups_factor[mx_panel$unique_groups>=2&mx_panel$unique_groups<=4]<-'2-4'
mx_panel$groups_factor[mx_panel$unique_groups>=5&mx_panel$unique_groups<=8]<-'5-8'
mx_panel$groups_factor[mx_panel$unique_groups>8]<-'>8'

for (i in c(2010, 2019)) {
  sp <- merge(munis, subset(mx_panel, year == i), by='inegi', all.x = TRUE)#merge data by year
  sp$groups_factor<-factor(sp$groups_factor, levels=c('0', '1', '2-4', '5-8', '>8'))#set factor levels
  
  # Create the plot
  mapplot<-ggplot() +
    geom_sf(data = sp, aes(fill = groups_factor), color = "light grey", size=.25) + # Make the boundaries transparent
    scale_fill_manual(values = brewer.pal(n = 5, name = "Greys"), name = "Groups") +
    labs(title = i) +
    theme_minimal() +
    theme(panel.grid = element_blank(),  # Remove gridlines
          axis.title = element_blank(), # Remove axis titles
          axis.text = element_blank(),# Remove axis text (ticks)
          plot.title = element_text(size = 20))  
  
  # Save the plot
  ggsave(paste(#'output/',
               "mexico_map", i, ".png", sep = ""), mapplot, width = 8, height = 6)
}

###########################################
############# Kingpin models###############
###########################################

####Defining treatments

#ever treated municipalities
mx_panel$kingpin_treated<-ifelse(mx_panel$muni_id%in%unique(mx_panel$muni_id[mx_panel$kingpin_tr==1]), 1, 0)

#ensuring all non-treated years are 0
mx_panel$kingpin_tr[is.na(mx_panel$kingpin_tr)]<-0

#event study treatment (grouping)
mx_panel$kingpinwp<-as.numeric(as.character(mx_panel$kingpin_tofrom))
mx_panel$kingpinwp[mx_panel$kingpin_tofrom< -3]<-'pre'
mx_panel$kingpinwp[mx_panel$kingpin_tofrom > 2]<-'post'

mx_panel$kingpinwp[is.na(mx_panel$kingpinwp)&mx_panel$kingpin_treated==1]<-'pre'
mx_panel$kingpinwp[is.na(mx_panel$kingpinwp)&mx_panel$kingpin_treated==0]<-'never_treated'

mx_panel$kingpinwp<-factor(mx_panel$kingpinwp, levels=c('-1', 'pre', 'never_treated', '-3', '-2', '0', '1', '2', 'post'))

############## Results

####################################
#####TABLE 1: Kingpin DID
models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'),'kingpin_tr',
             data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST', 'PAN party', 'Marijuana', 'Poppy'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/kingpin_criminal.tex')
table

####################################
#####Figure 2: Kingpin Event Study

es_models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'),'kingpinwp',
                         data=mx_panel, covariates='+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
es_models

es_data1<-event_study_data(es_models[1:2], c('Major', 'Minor'), -1, 'kingpinwp')

sub_plot_kingpin<-es_base_plot+geom_line(data=es_data1, aes(x=as.numeric(year), y=estimate, group=dv, linetype=dv))+
  geom_ribbon(data=es_data1, aes(x=as.numeric(year), y=estimate, group=dv,ymin=conf.low, ymax=conf.high, fill=dv), linetype=2, alpha=0.2)+
  geom_point(data=es_data1, aes(x=as.numeric(year), y=estimate, group=dv), size=1)+
  scale_fill_manual(values=c("dark grey", 'light grey'))+
  xlab("Year")+ylab("Change in Groups")+
  theme(legend.position='bottom', legend.text = element_text(size=16))+
  guides(linetype=guide_legend(title="Group Type"), fill='none')+
  geom_vline(xintercept=c(-1+.5), linetype='dotted')
sub_plot_kingpin

es_data2<-event_study_data(es_models[3:4], c('Emergence', 'Expansion'), -1, 'kingpinwp')

sub_small_plot_kingpin<-es_base_plot+geom_line(data=es_data2, aes(x=as.numeric(year), y=estimate, group=dv, linetype=dv))+
  geom_ribbon(data=es_data2, aes(x=as.numeric(year), y=estimate, group=dv,ymin=conf.low, ymax=conf.high, fill=dv), linetype=2, alpha=0.2)+
  geom_point(data=es_data2, aes(x=as.numeric(year), y=estimate, group=dv), size=1)+
  scale_fill_manual(values=c("dark grey", 'light grey'))+
  xlab("Year")+ylab("Change in Groups")+
  theme(legend.position='bottom', legend.text = element_text(size=16))+
  guides(linetype=guide_legend(title="Group Type"), fill='none')+
  geom_vline(xintercept=c(-1+.5), linetype='dotted')
sub_small_plot_kingpin
#ggsave('output/sub_plot_kingpin.pdf', plot= sub_plot_kingpin, height=4, width=6)
#ggsave('output/sub_small_plot_kingpin.pdf', plot= sub_small_plot_kingpin, height=4, width=6)

#############################################
#########Gas pipelines#######################
#############################################

############ defining treatments
mx_panel$gasxprice<-log(mx_panel$length_pre+1)*(mx_panel$inflation_adjusted_price_pesos/10) #length x price
mx_panel$gasbinxprice<-I(mx_panel$length_pre>0)*(mx_panel$inflation_adjusted_price_pesos/10) #presence x price
mx_panel$gasbin<-as.numeric(I(mx_panel$length_pre>0)) #binary pipeline presence
mx_panel$distxprice<-log(mx_panel$dist_vector_pre+1)*(mx_panel$inflation_adjusted_price_pesos/10) #distance x price
mx_panel$gas_post17<-log(mx_panel$length_pre+1)*I(mx_panel$year>=2017) #length x post-2017
mx_panel$gasxprice2020<-log(mx_panel$length_all+1)*(mx_panel$inflation_adjusted_price_pesos/10) #2020 pipeline x price


########### Results

##############################
########Table 2: Gas Pipelines

models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'),'gasxprice',
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST', 'PAN party', 'Marijuana', 'Poppy'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/gas_criminal.tex')
table


######################################################
#####Appendix A: Additional Information##############
######################################################

tomas_by_year<-read.csv('tomas_by_year_pemex.csv', stringsAsFactors=F)

tomas_by_year_plot<-ggplot(tomas_by_year, aes(x=year, y=tomas))+geom_line()+theme_bw()+ ggtitle("Pipeline robbery incidents")+
  xlab("Year")+ylab("Fuel Theft Incidents")+
  theme(plot.title = element_text(hjust = 0.5))+ 
  theme(axis.text=element_text(size=14),
        legend.text=element_text(size=14),
        axis.title=element_text(size=16))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  geom_vline(xintercept=2010, linetype='dashed')
tomas_by_year_plot
#ggsave('output/tomas_by_year_plot.pdf', plot= tomas_by_year_plot, height=5, width=6)


gas_prices<-read.csv('gas_prices_annual_mexico.csv')
gas_prices_plot<-ggplot(gas_prices, aes(x=year, y=inflation_adjusted_price_pesos, color='red'))+geom_line()+
  geom_line(data=gas_prices, aes(x=year, y=gas_price_mexico_regular,color='blue'))+
  theme_bw()+ ggtitle("Fuel Prices in Mexico")+
  xlab("Year")+ylab("Pesos")+
  theme(plot.title = element_text(hjust = 0.5))+ 
  theme(axis.text=element_text(size=14),
        legend.text=element_text(size=14),
        axis.title=element_text(size=16))+
  scale_x_continuous(breaks= c(2000, 2005, 2010, 2015, 2020))+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank())+
  geom_vline(xintercept=2010, linetype='dashed')+
  guides(color=guide_legend(title="Price type"), fill=FALSE)  +
  scale_colour_manual(values = c("red","blue"),  
                      labels=c('Nominal', 'Real'))
gas_prices_plot
#ggsave('output/gas_prices_plot.pdf', plot= gas_prices_plot, height=5, width=7)


#########################################
############A.7: Full tables

models<-returnModels( c('dominant', 'small_groups', 'emergence', 'expansion'), c('kingpin_tr'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/full_tables_kp.tex')
table

models<-returnModels( c('dominant', 'small_groups', 'emergence', 'expansion'), c('gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/full_tables_gas.tex')
table

#########################################
##############A.8 Broken down by group type

models<-returnModels( c('splinters', 'cells', 'unaffiliated'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Splinters', 'Splinters', 'Cells', 'Cells', 'Unaffiliated', 'Unaffiliated'),
              depvar=FALSE,
              fontsize='small',
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/results_bytype.tex')
table

#emergence
models<-returnModels( c('splinters_emergence', 'cells_emergence', 'unaffiliated_emergence'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Splinters', 'Splinters', 'Cells', 'Cells', 'Unaffiliated', 'Unaffiliated'),
              depvar=FALSE,
              fontsize='small',
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/results_bytype_emergence.tex')
table

#expansion
models<-returnModels( c('splinters_expansion', 'cells_expansion', 'unaffiliated_expansion'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Splinters', 'Splinters', 'Cells', 'Cells', 'Unaffiliated', 'Unaffiliated'),
              depvar=FALSE,
              fontsize='small',
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/results_bytype_expansion.tex')
table

###################################
##### A.8.1 Alternative coding

#For splinters/cells
models<-returnModels( c('splinter_alt', 'cell_alt'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Splinters', 'Splinters', 'Cells', 'Cells'),
              depvar=FALSE,
              fontsize='small',
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/alternate_coding.tex')
table

# for affiliated v unaffiliated
models<-returnModels( c('affiliated', 'unaffiliated'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              headers=c('Affiliated', 'Affiliated', 'Unaffiliated', 'Unaffiliated'),
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST'),
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/affiliated.tex')
table


##########################################################
#############Appendix B: Robustness######################
#########################################################

####################################
######### B.1.1 Corroborated Groups

mx_panel$small_corroborated<-mx_panel$splinters_corroborated+mx_panel$cells_corroborated+mx_panel$unaffiliated_corroborated

models<-returnModels( c('small_corroborated'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
table<-etable(models, tex=TRUE, 
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST'),
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/corroborated.tex')
table

#################################
######### B.1.2 Dropping Small Groups
mx_panel$small_m10<-mx_panel$splinters_m10+mx_panel$cells_m10+mx_panel$unaffiliated_m10

models<-returnModels( c('small_m10'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST'),
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/m10.tex')

mx_panel$small_m10<-mx_panel$splinters_m10+mx_panel$cells_m10+mx_panel$unaffiliated_m10

models<-returnModels( c('small_m10'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              depvar=FALSE,
              fontsize='small',
              drop=c('NOM_EST'),
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/m10.tex')
table

###################################
#### B.1.3 Removing umbrella groups

mx_panel$small_noum<-mx_panel$small_groups-mx_panel$umbrella

models<-returnModels( c('small_noum'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              fontsize='small',
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/noumbrella.tex')
table

##################################
######B.1.4: Binary DV (presence)

mx_panel$dominant_bin<-as.numeric(I(mx_panel$dominant>0))
mx_panel$small_bin<-as.numeric(I(mx_panel$small_groups>0))
mx_panel$emergence_bin<-as.numeric(I(mx_panel$emergence_small>0))
mx_panel$expansion_bin<-as.numeric(I(mx_panel$expansion_small>0))

models<-returnModels( c('dominant_bin', 'small_bin', 'emergence_bin', 'expansion_bin'),
                      c('kingpin_tr'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/binary_kp.tex')
table

models<-returnModels( c('dominant_bin', 'small_bin', 'emergence_bin', 'expansion_bin'),
                      c('gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/binary_gas.tex')
table


################################
#### B.1.5. without backfilling the data
mx_panel$small_nobf<-mx_panel$splinters_nobf+mx_panel$cells_nobf+mx_panel$unaffiliated_nobf

models<-returnModels( c('dominant_nobf', 'small_nobf'), c('kingpin_tr', 'gasxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')

table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Major', 'Minor', 'Minor'),
              fontsize='small',
              order=c('Kingpin Removal', 'Gas Pipeline x Price'),
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/nobf.tex')
table


#######################################
######### Alternate Kingpin Treatments

############treatment always 'on'
models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'), c('kingpin_tr_on'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/tr_on.tex')
table

########### Treating affected only as municipalities treated in t

models<-returnModels( c('dominant', 'small_groups','emergence_small', 'expansion_small'), c('kingpin_tr_yof'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/kingpin_tr_yof.tex')
table

#######################################
######### Alternative Gas Treatments
models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'), c('gasbinxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/gas_bin.tex')
table

models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'), c('gas_post17'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/gas_post17.tex')
table

models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'), c('distxprice'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/distxprice.tex')
table

models<-returnModels( c('dominant', 'small_groups', 'emergence_small', 'expansion_small'), c('gasxprice2020'),
                      data=mx_panel, covariates='+pan+log(mariguana_kghec+1)+log(amapola_kghec+1)+NOM_EST:year')
table<-etable(models, tex=TRUE, 
              depvar=FALSE, 
              headers=c('Major', 'Minor', 'Emergence', 'Expansion'),
              fontsize='small',
              drop=c('NOM_EST'),
              signif.code = c("***"=0.001, "**"=0.01, "*"=0.05, '+'=.1),
              vcov='cluster')
write(table[3:length(table)-1], 'output/gasxprice2020.tex')
table


#############################
#####Other DID estimators###
##############################

########################
### chaisemartin
tidy.did_multiplegt = function(x, level = 0.95) {
  ests = x[grepl("^placebo_|^effect|^dynamic_", names(x))]
  ret = data.frame(
    term      = names(ests),
    estimate  = as.numeric(ests),
    std.error = as.numeric(x[grepl("^se_placebo|^se_effect|^se_dynamic", names(x))]),
    N         = as.numeric(x[grepl("^N_placebo|^N_effect|^N_dynamic", names(x))])
  ) |>
    # For CIs we'll assume standard normal distribution
    within({
      conf.low  = estimate - std.error*(qnorm(1-(1-level)/2))
      conf.high = estimate + std.error*(qnorm(1-(1-level)/2))
    })
  return(ret)
}

theme_set(theme_minimal()) 

mx_panel$log_mj<-log(mx_panel$mariguana_kghec+1)
mx_panel$log_poppy<-log(mx_panel$amapola_kghec+1)

set.seed(1234)
mod_dom=did_multiplegt(
  mx_panel, 'dominant',
  'muni_id', 'year', 'kingpin_tr',
  controls=c('pan', 'log_mj', 'log_poppy'),# original regression params
  dynamic   = 2,                  # no. of post-treatment periods
  placebo   = 2,                  # no. of pre-treatment periods
  brep      = 1000,                  # no. of bootstraps (required for SEs)
  cluster   = 'muni_id',                # variable to cluster SEs on
  parallel  = TRUE                 # run the bootstraps in parallel
)

theme_set(theme_minimal())

mod_dom_td = tidy.did_multiplegt(mod_dom)

mod_dom_plot<-mod_dom_td |>
  within({
    term = gsub("^placebo_", "-", term)
    term = gsub("^effect", "0", term)
    term = gsub("^dynamic_", "", term)
    term = as.integer(term)
  }) |>
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange() + geom_line()+
  labs(
    x = "Time to treatment", y = "Effect size", title = "Major Cartels"
  )
mod_dom_plot
#ggsave('output/cdh_major.pdf', plot= mod_dom_plot, height=5, width=6)


mod_small=did_multiplegt(mx_panel, 'small_groups',
                         'muni_id', 'year', 'kingpin_tr',
                         controls=c('pan', 'log_mj', 'log_poppy'),# original regression params
                         dynamic   = 2,                  # no. of post-treatment periods
                         placebo   = 2,                  # no. of pre-treatment periods
                         brep      = 1000,                  # no. of bootstraps (required for SEs)
                         cluster   = 'muni_id',                # variable to cluster SEs on
                         parallel  = TRUE                 # run the bootstraps in parallel
)

mod_small_td = tidy.did_multiplegt(mod_small)

mod_small_plot<-mod_small_td |>
  within({
    term = gsub("^placebo_", "-", term)
    term = gsub("^effect", "0", term)
    term = gsub("^dynamic_", "", term)
    term = as.integer(term)
  }) |>
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange() + geom_line()+
  labs(
    x = "Time to treatment", y = "Effect size", title = "Minor Groups", 
    subtitle = "Chaisemartin and D'Haultfoeuille (2020)"
  )

mod_small_plot
#ggsave('output/cdh_minor.pdf', plot= mod_small_plot, height=5, width=6)

mod_emergence=did_multiplegt(
  mx_panel, 'emergence_small',
  'muni_id', 'year', 'kingpin_tr', 
  controls=c('pan', 'log_mj', 'log_poppy'),# original regression params
  dynamic   = 2,                  # no. of post-treatment periods
  placebo   = 2,                  # no. of pre-treatment periods
  brep      = 1000,                  # no. of bootstraps (required for SEs)
  cluster   = 'muni_id',                # variable to cluster SEs on
  parallel  = TRUE                 # run the bootstraps in parallel
)

mod_emergence_td = tidy.did_multiplegt(mod_emergence)

mod_emergence_plot<-mod_emergence_td |>
  within({
    term = gsub("^placebo_", "-", term)
    term = gsub("^effect", "0", term)
    term = gsub("^dynamic_", "", term)
    term = as.integer(term)
  }) |>
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange() + geom_line()+
  labs(
    x = "Time to treatment", y = "Effect size", title = "Group Emergence"
  )

mod_emergence_plot
#ggsave('output/cdh_emergence.pdf', plot= mod_emergence_plot, height=5, width=6)



mod_expansion=did_multiplegt(
  mx_panel, 'expansion_small',
  'muni_id', 'year', 'kingpin_tr',                         
  controls=c('pan', 'log_mj', 'log_poppy'),# original regression params
  dynamic   = 2,                  # no. of post-treatment periods
  placebo   = 2,                  # no. of pre-treatment periods
  brep      = 1000,                  # no. of bootstraps (required for SEs)
  cluster   = 'muni_id',                # variable to cluster SEs on
  parallel  = TRUE                 # run the bootstraps in parallel
)


mod_expansion_td = tidy.did_multiplegt(mod_expansion)

mod_expansion_plot<-mod_expansion_td |>
  within({
    term = gsub("^placebo_", "-", term)
    term = gsub("^effect", "0", term)
    term = gsub("^dynamic_", "", term)
    term = as.integer(term)
  }) |>
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange() + geom_line()+
  labs(
    x = "Time to treatment", y = "Effect size", title = "Group expansion"
  )

mod_expansion_plot
#ggsave('output/cdh_expansion.pdf', plot= mod_expansion_plot, height=5, width=6)


#######################################
####################Callaway Sant'Anna

#defining indicator
mx_panel$first_treat<-NA 

for (munid in unique(mx_panel$muni_id[mx_panel$kingpin_tr==1])){
  print(munid)
  first_year<-min(mx_panel$year[mx_panel$kingpin_tr==1&mx_panel$muni_id==munid], na.rm=T)
  
  mx_panel$first_treat[mx_panel$muni_id==munid]<-first_year
}

mx_panel$first_treat[is.na(mx_panel$first_treat)]<-0

mx_panel$muni_numeric<-as.numeric(as.factor(mx_panel$muni_id))

dominant_attgt <- att_gt(yname = "dominant",
                         tname = "year",
                         idname = "muni_numeric",
                         gname = "first_treat",
                         data = mx_panel
)

dominant.es <- aggte(dominant_attgt, type = "dynamic")
summary(dominant.es)

att<-dominant.es$overall.att
seatt<-dominant.es$overall.se

sac<-data.frame(cbind(att, seatt))
sac$outcome<-'Major'

ggdid(dominant.es)
#ggsave('output/major_did.pdf', plot= ggdid(dominant.es), height=6, width=8)


small_attgt <- att_gt(yname = "small_groups",
                      tname = "year",
                      idname = "muni_numeric",
                      gname = "first_treat",
                      data = mx_panel
)

small.es <- aggte(small_attgt, type = "dynamic")

att<-small.es$overall.att
seatt<-small.es$overall.se

sac_new<-data.frame(cbind(att, seatt))
sac_new$outcome<-'Minor'

sac<-rbind(sac, sac_new)

ggdid(small.es)
#ggsave('output/minor_did.pdf', plot= ggdid(small.es), height=6, width=8)


emergence_attgt <- att_gt(yname = "emergence_small",
                          tname = "year",
                          idname = "muni_numeric",
                          gname = "first_treat",
                          data = mx_panel
)

emergence.es <- aggte(emergence_attgt, type = "dynamic")
summary(emergence.es)

att<-emergence.es$overall.att
seatt<-emergence.es$overall.se

sac_new<-data.frame(cbind(att, seatt))
sac_new$outcome<-'Emergence'

sac<-rbind(sac, sac_new)

ggdid(emergence.es)
#ggsave('output/emergence_did.pdf', plot= ggdid(emergence.es), height=6, width=8)


expansion_attgt <- att_gt(yname = "expansion_small",
                          tname = "year",
                          idname = "muni_numeric",
                          gname = "first_treat",
                          data = mx_panel
)
expansion.es <- aggte(expansion_attgt, type = "dynamic")
summary(expansion.es)

att<-expansion.es$overall.att
seatt<-expansion.es$overall.se

sac_new<-data.frame(cbind(att, seatt))
sac_new$outcome<-'Expansion'

sac<-rbind(sac, sac_new)

ggdid(expansion.es)
#ggsave('output/expansion_did.pdf', plot= ggdid(expansion.es), height=6, width=8)


sac$ylow<-sac$att-sac$seatt*1.96
sac$yhigh<-sac$att+sac$seatt*1.96

sac$outcome<-factor(sac$outcome, c('Major', 'Minor', 'Emergence',
                                   'Expansion'))

#look of plot
coef_plot_base<-ggplot()+  theme_bw()+
  coord_flip()+  scale_y_continuous(name='Change in Groups')+
  theme(plot.title = element_text(hjust = 0.5, size=20),
        text = element_text(size=18),
        axis.text=element_text(size=18),
        legend.position='none')+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  scale_linetype_discrete(name='', guide=guide_legend())+
  xlab("")+  geom_hline(aes(yintercept=0), lty=4)+
  scale_shape_discrete(name='', guide=guide_legend())



callaway<-coef_plot_base+geom_pointrange(data=sac, aes(x=outcome, y=att, ymin=ylow, ymax=yhigh), 
                                         position=position_dodge(width=.4), size=.7)+
  scale_x_discrete(limits=rev)

callaway
#ggsave('output/callaway_att.pdf', plot= callaway, height=6, width=8)



